Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance #149

Merged
merged 6 commits into from
Oct 12, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 107 additions & 47 deletions detector/filecontent_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import (
"fmt"
"regexp"
"strings"

"sync"
"talisman/gitrepo"

log "github.com/Sirupsen/logrus"
Expand All @@ -31,66 +31,126 @@ func (fc *FileContentDetector) AggressiveMode() *FileContentDetector {
return fc
}

func (fc *FileContentDetector) Test(additions []gitrepo.Addition, ignoreConfig TalismanRCIgnore, result *DetectionResults) {
cc := NewChecksumCompare(additions, ignoreConfig)
for _, addition := range additions {
if ignoreConfig.Deny(addition, "filecontent") || cc.IsScanNotRequired(addition) {
log.WithFields(log.Fields{
"filePath": addition.Path,
}).Info("Ignoring addition as it was specified to be ignored.")
result.Ignore(addition.Path, "filecontent")
continue
}

if string(addition.Name) == DefaultRCFileName {
re := regexp.MustCompile(`(?i)checksum[ \t]*:[ \t]*[0-9a-fA-F]+`)
content := re.ReplaceAllString(string(addition.Data), "")
data := []byte(content)
addition.Data = data
}
type contentType int

base64Results := fc.detectFile(addition.Data, checkBase64)
fillBase46DetectionResults(base64Results, addition, result)
const (
base64Content contentType = iota
hexContent
creditCardContent
)

hexResults := fc.detectFile(addition.Data, checkHex)
fillHexDetectionResults(hexResults, addition, result)
func (ct contentType) getInfo() string {
switch ct {
case base64Content:
return "Failing file as it contains a base64 encoded text."
case hexContent:
return "Failing file as it contains a hex encoded text."
case creditCardContent:
return "Failing file as it contains a potential credit card number."
}
return ""
}

creditCardResults := fc.detectFile(addition.Data, checkCreditCardNumber)
fillCreditCardDetectionResults(creditCardResults, addition, result)
func (ct contentType) getOutput() string {
dineshba marked this conversation as resolved.
Show resolved Hide resolved
switch ct {
case base64Content:
return "Expected file to not to contain base64 encoded texts such as: %s"
case hexContent:
return "Expected file to not to contain hex encoded texts such as: %s"
case creditCardContent:
return "Expected file to not to contain credit card numbers such as: %s"
}

return ""
}

func fillResults(results []string, addition gitrepo.Addition, result *DetectionResults, info string, output string) {
for _, res := range results {
if res != "" {
log.WithFields(log.Fields{
"filePath": addition.Path,
}).Info(info)
type content struct {
name gitrepo.FileName
path gitrepo.FilePath
contentType contentType
results []string
}

func (fc *FileContentDetector) Test(additions []gitrepo.Addition, ignoreConfig TalismanRCIgnore, result *DetectionResults) {
cc := NewChecksumCompare(additions, ignoreConfig)
re := regexp.MustCompile(`(?i)checksum[ \t]*:[ \t]*[0-9a-fA-F]+`)

contents := make(chan content, 512)
ignoredFilePaths := make(chan gitrepo.FilePath, len(additions))

waitGroup := sync.WaitGroup{}
waitGroup.Add(len(additions))
for _, addition := range additions {
go func(waitGroup *sync.WaitGroup, addition gitrepo.Addition) {
defer waitGroup.Done()
if ignoreConfig.Deny(addition, "filecontent") || cc.IsScanNotRequired(addition) {
ignoredFilePaths <- addition.Path
return
}

if string(addition.Name) == DefaultRCFileName {
result.Warn(addition.Path, "filecontent", fmt.Sprintf(output, res), []string{})
} else {
result.Fail(addition.Path, "filecontent", fmt.Sprintf(output, res), []string{})
content := re.ReplaceAllString(string(addition.Data), "")
data := []byte(content)
addition.Data = data
}
contents <- content{
name: addition.Name,
path: addition.Path,
contentType: base64Content,
results: fc.detectFile(addition.Data, checkBase64),
}
contents <- content{
name: addition.Name,
path: addition.Path,
contentType: hexContent,
results: fc.detectFile(addition.Data, checkHex),
}
contents <- content{
name: addition.Name,
path: addition.Path,
contentType: creditCardContent,
results: fc.detectFile(addition.Data, checkCreditCardNumber),
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lot of repeated code here.
I wonder if repetition could be avoided ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 96 to 112, we are just creating 3 structs and sending to channel. I am not very sure how to reduce that. If you have any idea, please let me know.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored this. Please review it

}(&waitGroup, addition)
}
go func(waitGroup *sync.WaitGroup) {
waitGroup.Wait()
close(ignoredFilePaths)
close(contents)
}(&waitGroup)

for ignoredChanHasMore, contentChanHasMore := true, true; ignoredChanHasMore || contentChanHasMore; {
select {
case ignoredFilePath, hasMore := <-ignoredFilePaths:
processIgnoredFilepath(ignoredFilePath, result)
ignoredChanHasMore = hasMore
case c, hasMore := <-contents:
processContent(c, result)
contentChanHasMore = hasMore
}
}
}

func fillBase46DetectionResults(base64Results []string, addition gitrepo.Addition, result *DetectionResults) {
const info = "Failing file as it contains a base64 encoded text."
const output = "Expected file to not to contain base64 encoded texts such as: %s"
fillResults(base64Results, addition, result, info, output)
}

func fillCreditCardDetectionResults(creditCardResults []string, addition gitrepo.Addition, result *DetectionResults) {
const info = "Failing file as it contains a potential credit card number."
const output = "Expected file to not to contain credit card numbers such as: %s"
fillResults(creditCardResults, addition, result, info, output)
func processIgnoredFilepath(path gitrepo.FilePath, result *DetectionResults) {
log.WithFields(log.Fields{
"filePath": path,
}).Info("Ignoring addition as it was specified to be ignored.")
result.Ignore(path, "filecontent")
}

func fillHexDetectionResults(hexResults []string, addition gitrepo.Addition, result *DetectionResults) {
const info = "Failing file as it contains a hex encoded text."
const output = "Expected file to not to contain hex encoded texts such as: %s"
fillResults(hexResults, addition, result, info, output)
func processContent(c content, result *DetectionResults) {
for _, res := range c.results {
if res != "" {
log.WithFields(log.Fields{
"filePath": c.path,
}).Info(c.contentType.getInfo())
if string(c.name) == DefaultRCFileName {
result.Warn(c.path, "filecontent", fmt.Sprintf(c.contentType.getOutput(), res), []string{})
} else {
result.Fail(c.path, "filecontent", fmt.Sprintf(c.contentType.getOutput(), res), []string{})
}
}
}
}

func (fc *FileContentDetector) detectFile(data []byte, getResult fn) []string {
Expand Down
83 changes: 59 additions & 24 deletions detector/pattern_detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,77 @@ package detector

import (
"fmt"
"talisman/gitrepo"

log "github.com/Sirupsen/logrus"
"sync"
"talisman/gitrepo"
)

type PatternDetector struct {
secretsPattern *PatternMatcher
}

type match struct {
name gitrepo.FileName
path gitrepo.FilePath
commits []string
detections []string
}

//Test tests the contents of the Additions to ensure that they don't look suspicious
func (detector PatternDetector) Test(additions []gitrepo.Addition, ignoreConfig TalismanRCIgnore, result *DetectionResults) {
cc := NewChecksumCompare(additions, ignoreConfig)
matches := make(chan match, 512)
ignoredFilePaths := make(chan gitrepo.FilePath, 512)
waitGroup := sync.WaitGroup{}
waitGroup.Add(len(additions))
for _, addition := range additions {
if ignoreConfig.Deny(addition, "filecontent") || cc.IsScanNotRequired(addition) {
log.WithFields(log.Fields{
"filePath": addition.Path,
}).Info("Ignoring addition as it was specified to be ignored.")
result.Ignore(addition.Path, "filecontent")
continue
go func(addition gitrepo.Addition, waitGroup *sync.WaitGroup) {
dineshba marked this conversation as resolved.
Show resolved Hide resolved
defer waitGroup.Done()
if ignoreConfig.Deny(addition, "filecontent") || cc.IsScanNotRequired(addition) {
ignoredFilePaths <- addition.Path
return
}
detections := detector.secretsPattern.check(string(addition.Data))
matches <- match{name: addition.Name, path: addition.Path, detections: detections, commits: addition.Commits}
}(addition, &waitGroup)
}
go func(group *sync.WaitGroup) {
group.Wait()
close(matches)
close(ignoredFilePaths)
}(&waitGroup)
dineshba marked this conversation as resolved.
Show resolved Hide resolved
for i := 0; i < len(additions); i++ {
select {
case match := <-matches:
detector.processMatch(match, result)
case ignore := <-ignoredFilePaths:
detector.processIgnore(ignore, result)
}
detections := detector.secretsPattern.check(string(addition.Data))
for _, detection := range detections {
if detection != "" {
if string(addition.Name) == DefaultRCFileName {
log.WithFields(log.Fields{
"filePath": addition.Path,
"pattern": detection,
}).Warn("Warning file as it matched pattern.")
result.Warn(addition.Path, "filecontent", fmt.Sprintf("Potential secret pattern : %s", detection), addition.Commits)
} else {
log.WithFields(log.Fields{
"filePath": addition.Path,
"pattern": detection,
}).Info("Failing file as it matched pattern.")
result.Fail(addition.Path, "filecontent", fmt.Sprintf("Potential secret pattern : %s", detection), addition.Commits)
}
}
}

func (detector PatternDetector) processIgnore(ignoredFilePath gitrepo.FilePath, result *DetectionResults) {
log.WithFields(log.Fields{
"filePath": ignoredFilePath,
}).Info("Ignoring addition as it was specified to be ignored.")
result.Ignore(ignoredFilePath, "filecontent")
}

func (detector PatternDetector) processMatch(match match, result *DetectionResults) {
for _, detection := range match.detections {
if detection != "" {
if string(match.name) == DefaultRCFileName {
log.WithFields(log.Fields{
"filePath": match.path,
"pattern": detection,
}).Warn("Warning file as it matched pattern.")
result.Warn(match.path, "filecontent", fmt.Sprintf("Potential secret pattern : %s", detection), match.commits)
} else {
log.WithFields(log.Fields{
"filePath": match.path,
"pattern": detection,
}).Info("Failing file as it matched pattern.")
result.Fail(match.path, "filecontent", fmt.Sprintf("Potential secret pattern : %s", detection), match.commits)
}
}
}
Expand Down