forked from gitleaks/gitleaks
/
repo.go
262 lines (234 loc) Β· 6.96 KB
/
repo.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path"
"path/filepath"
"sync"
)
// Repo is
type Repo struct {
name string
url string
path string
status string // TODO
leaks []Leak
reportPath string
}
// Leak is
type Leak struct {
Line string `json:"line"`
Commit string `json:"commit"`
Offender string `json:"string"`
Reason string `json:"reason"`
Msg string `json:"commitMsg"`
Time string `json:"time"`
Author string `json:"author"`
File string `json:"file"`
RepoURL string `json:"repoURL"`
}
// Commit is
type Commit struct {
Hash string
Author string
Time string
Msg string
}
// newLocalRepo will such and such
func newLocalRepo(repoPath string) *Repo {
_, name := path.Split(repoPath)
repo := &Repo{
name: name,
path: repoPath,
reportPath: opts.ReportPath,
}
return repo
}
// newRepo
func newRepo(name string, url string, path string) *Repo {
repo := &Repo{
name: name,
url: url,
path: path,
reportPath: opts.ReportPath,
}
return repo
}
// rmTmp
func (repo *Repo) rmTmp() {
log.Printf("removing tmp gitleaks repo %s\n", repo.path)
os.Remove(repo.path)
}
// Audit operates on a single repo and searches the full or partial history of the repo.
// A semaphore is declared for every repo to bind concurrency. If unbounded, the system will throw a
// `too many open files` error. Eventually, gitleaks should use src-d/go-git to avoid shelling out
// commands so that users could opt for doing all clones/diffs in memory.
// Audit also declares two WaitGroups, one for distributing regex/entropy checks, and one for receiving
// the leaks if there are any. This could be done a little more elegantly in the future.
func (repo *Repo) audit() (bool, error) {
var (
out []byte
err error
commitWG sync.WaitGroup
gitLeakReceiverWG sync.WaitGroup
gitLeaksChan = make(chan Leak)
leaks []Leak
semaphoreChan = make(chan struct{}, opts.Concurrency)
leaksPst bool
)
if opts.Tmp {
defer repo.rmTmp()
}
dotGitPath := filepath.Join(repo.path, ".git")
// Navigate to proper location to being audit. Clone repo
// if not present, otherwise fetch for new changes.
if _, err := os.Stat(dotGitPath); os.IsNotExist(err) {
if opts.LocalMode {
return false, fmt.Errorf("%s does not exist", repo.path)
}
// no repo present, clone it
log.Printf("cloning \x1b[37;1m%s\x1b[0m into %s...\n", repo.url, repo.path)
err = exec.Command("git", "clone", repo.url, repo.path).Run()
if err != nil {
return false, fmt.Errorf("cannot clone %s into %s", repo.url, repo.path)
}
} else {
log.Printf("fetching \x1b[37;1m%s\x1b[0m from %s ...\n", repo.name, repo.path)
err = os.Chdir(fmt.Sprintf(repo.path))
if err != nil {
return false, fmt.Errorf("cannot navigate to %s", repo.path)
}
err = exec.Command("git", "fetch").Run()
if err != nil {
return false, fmt.Errorf("cannot fetch %s from %s", repo.url, repo.path)
}
}
err = os.Chdir(fmt.Sprintf(repo.path))
if err != nil {
return false, fmt.Errorf("cannot navigate to %s", repo.path)
}
gitFormat := "--format=%H%n%an%n%s%n%ci"
out, err = exec.Command("git", "rev-list", "--all",
"--remotes", "--topo-order", gitFormat).Output()
if err != nil {
return false, fmt.Errorf("could not retreive rev-list from %s", repo.name)
}
revListLines := bytes.Split(out, []byte("\n"))
commits := parseRevList(revListLines)
for _, commit := range commits {
if commit.Hash == "" {
continue
}
commitWG.Add(1)
go auditDiff(commit, repo, &commitWG, &gitLeakReceiverWG,
semaphoreChan, gitLeaksChan)
if commit.Hash == opts.SinceCommit {
break
}
}
go reportAggregator(&gitLeakReceiverWG, gitLeaksChan, &leaks)
commitWG.Wait()
gitLeakReceiverWG.Wait()
if len(leaks) != 0 {
leaksPst = true
log.Printf("\x1b[31;2mLEAKS DETECTED for %s\x1b[0m!\n", repo.name)
} else {
log.Printf("No Leaks detected for \x1b[32;2m%s\x1b[0m\n", repo.name)
}
if opts.ReportPath != "" && len(leaks) != 0 {
err = repo.writeReport(leaks)
if err != nil {
return leaksPst, fmt.Errorf("could not write report to %s", opts.ReportPath)
}
}
return leaksPst, nil
}
// Used by audit, writeReport will generate a report and write it out to
// --report-path=<path> if specified, otherwise a report will be generated to
// $PWD/<repo_name>_leaks.json. No report will be generated if
// no leaks have been found or --report-out is not set.
func (repo *Repo) writeReport(leaks []Leak) error {
reportJSON, _ := json.MarshalIndent(leaks, "", "\t")
if _, err := os.Stat(opts.ReportPath); os.IsNotExist(err) {
os.MkdirAll(opts.ReportPath, os.ModePerm)
}
reportFileName := fmt.Sprintf("%s_leaks.json", repo.name)
reportFile := filepath.Join(repo.reportPath, reportFileName)
err := ioutil.WriteFile(reportFile, reportJSON, 0644)
if err != nil {
return err
}
log.Printf("report for %s written to %s", repo.name, reportFile)
return nil
}
// parseRevList is responsible for parsing the output of
// $ `git rev-list --all -remotes --topo-order --format=%H%n%an%n%s%n%ci`
// sample output from the above command looks like:
// ...
// SHA
// Author Name
// Commit Msg
// Commit Date
// ...
// Used by audit
func parseRevList(revList [][]byte) []Commit {
var commits []Commit
for i := 0; i < len(revList)-1; i = i + 5 {
commit := Commit{
Hash: string(revList[i+1]),
Author: string(revList[i+2]),
Msg: string(revList[i+3]),
Time: string(revList[i+4]),
}
commits = append(commits, commit)
}
return commits
}
// reportAggregator is a go func responsible for ...
func reportAggregator(gitLeakReceiverWG *sync.WaitGroup, gitLeaks chan Leak, leaks *[]Leak) {
for gitLeak := range gitLeaks {
*leaks = append(*leaks, gitLeak)
if opts.Verbose {
b, err := json.MarshalIndent(gitLeak, "", " ")
if err != nil {
// handle this?
fmt.Printf("failed to output leak: %v", err)
}
fmt.Println(string(b))
}
gitLeakReceiverWG.Done()
}
}
// Used by audit, auditDiff is a go func responsible for diffing and auditing a commit.
// Three channels are input here: 1. a semaphore to bind gitleaks, 2. a leak stream, 3. error handling (TODO)
// This func performs a diff and runs regexes checks on each line of the diff.
func auditDiff(currCommit Commit, repo *Repo, commitWG *sync.WaitGroup,
gitLeakReceiverWG *sync.WaitGroup, semaphoreChan chan struct{},
gitLeaks chan Leak) {
// signal to WG this diff is done being audited
defer commitWG.Done()
if err := os.Chdir(fmt.Sprintf(repo.path)); err != nil {
// TODO handle this better
os.Exit(ExitFailure)
}
commitCmp := fmt.Sprintf("%s^!", currCommit.Hash)
semaphoreChan <- struct{}{}
out, err := exec.Command("git", "diff", commitCmp).Output()
<-semaphoreChan
if err != nil {
os.Exit(ExitFailure)
}
leaks := doChecks(string(out), currCommit, repo)
if len(leaks) == 0 {
return
}
for _, leak := range leaks {
gitLeakReceiverWG.Add(1)
gitLeaks <- leak
}
}