Skip to content

Commit

Permalink
feat(threat): add Verify datasets function (#165)
Browse files Browse the repository at this point in the history
* feat(threat): concat w/o fmt

Signed-off-by: Dwi Siswanto <me@dw1.io>

* feat(threat): add Verify func to checks datasets integrity

Signed-off-by: Dwi Siswanto <me@dw1.io>

* feat(threat): add license header

Signed-off-by: Dwi Siswanto <me@dw1.io>

* refactor(threat): update corrupted term to malformed

Signed-off-by: Dwi Siswanto <me@dw1.io>

* feat(teler): implement Verify datasets (checksum)

Signed-off-by: Dwi Siswanto <me@dw1.io>

---------

Signed-off-by: Dwi Siswanto <me@dw1.io>
  • Loading branch information
dwisiswant0 committed Feb 26, 2024
1 parent 941152d commit 99e86bb
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 5 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.19
require (
github.com/antonmedv/expr v1.12.7
github.com/bitfield/script v0.22.0
github.com/codingsince1985/checksum v1.3.0
github.com/daniel-hutao/spinlock v0.1.0
github.com/dwisiswant0/clientip v0.3.0
github.com/go-playground/validator/v10 v10.16.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWH
github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/codingsince1985/checksum v1.3.0 h1:kqqIqWBwjidGmt/pO4yXCEX+np7HACGx72EB+MkKcVY=
github.com/codingsince1985/checksum v1.3.0/go.mod h1:QfRskdtdWap+gJil8e5obw6I8/cWJ0SwMUACruWDSU8=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
github.com/daniel-hutao/spinlock v0.1.0 h1:qk6v2L6mJLUmxzq1eJ5xUIlCh4q0wM+26Qy/KfH5c3U=
Expand Down
18 changes: 18 additions & 0 deletions teler.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,24 @@ func (t *Teler) getResources() error {
updated = false
}

// Do checksum for threat datasets
if updated {
t.log.Debug("verifying datasets")
verify, err := threat.Verify()
if err != nil {
// Got something error while verifying
updated = false
}

// Checks if datasets is malformed/corrupted
//
// If not verified, err is defintely not nil.
if !verify {
t.log.Debug(err.Error())
updated = false
}
}

// Download the datasets of threat ruleset from teler-resources
// if threat datasets is not up-to-date, update check is disabled
// and in-memory option is true
Expand Down
6 changes: 5 additions & 1 deletion threat/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,9 @@
package threat

const (
errFilepath = "unable to get file path location of given %s threat type"
errFilepath = "unable to get file path location of given %s threat type"
errGetSumFile = "unable to fetch checksum file: %w"
errReadSumFile = "cannot read checksum file: %w"
errChecksum = "cannot perform checksum for '%s' file: %w"
errMalformed = "threat '%s' datasets is malformed, expect '%s' got '%s' sum"
)
8 changes: 4 additions & 4 deletions threat/var.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

package threat

import "fmt"

var (
DbURL = fmt.Sprintf("%s/raw/master/db/db.tar.zst", repoURL)
dbQuery = fmt.Sprintf("checksum=file:%s/raw/master/db/MD5SUMS", repoURL)
dbFile = "db.tar.zst"
DbURL = repoURL + "/raw/master/db/" + dbFile
sumURL = repoURL + "/raw/master/db/MD5SUMS"
dbQuery = "checksum=file:" + sumURL
)

var str = map[Threat]string{
Expand Down
96 changes: 96 additions & 0 deletions threat/verify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Licensed to Dwi Siswanto under one or more agreements.
// Dwi Siswanto licenses this file to you under the Apache 2.0 License.
// See the LICENSE-APACHE file in the project root for more information.

package threat

import (
"bufio"
"fmt"
"strings"

"net/http"
"path/filepath"

"github.com/codingsince1985/checksum"
)

// Verify checks the integrity of files by comparing their checksums with the
// MD5 sums obtained from a teler-resources repository.
//
// It fetches the MD5 sums, verifies that the fetched data is correct, and then
// checks the checksums of the local files against the obtained MD5 sums. It
// returns true if all checksums match, otherwise returns false along with an
// error if any issues occur during the verification process.
func Verify() (bool, error) {
md5sums, err := fetchMD5Sums()
if err != nil {
return false, err
}

return verifyChecksums(md5sums)
}

// fetchMD5Sums retrieves MD5 sums from a remote source and returns them as a
// map where filenames are the keys and MD5 sums are the values.
//
// In case of an error during the retrieval, it returns an error.
func fetchMD5Sums() (map[string]string, error) {
// Initialize a map to store the MD5 sums
md5sums := make(map[string]string)

resp, err := http.Get(sumURL)
if err != nil {
return md5sums, fmt.Errorf(errGetSumFile, err)
}
defer resp.Body.Close()

// Create a scanner to read the file content line by line
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()

// Split each line into filename and MD5 sum
parts := strings.Fields(line)
if len(parts) == 2 {
filename, md5 := parts[1], parts[0]
if filename == dbFile {
continue
}

md5sums[filename] = md5
}
}

// Check for errors during scanning
if err := scanner.Err(); err != nil {
return md5sums, fmt.Errorf(errReadSumFile, err)
}

return md5sums, nil
}

// verifyChecksums compares the MD5 sums obtained from a remote source
// with the local checksums of the files. It takes a map of filenames to
// MD5 sums as input and returns true if all checksums match, otherwise
// returns false along with an error if any checksums do not match.
func verifyChecksums(md5sums map[string]string) (bool, error) {
for _, threat := range List() {
p, err := threat.Filename(true)
if err != nil {
return false, err
}

sum, err := checksum.MD5sum(p)
if err != nil {
return false, fmt.Errorf(errChecksum, p, err)
}

f := filepath.Base(p)
if md5sum := md5sums[f]; sum != md5sum {
return false, fmt.Errorf(errMalformed, threat.String(), md5sum, sum)
}
}

return true, nil
}

2 comments on commit 99e86bb

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 99e86bb Previous: fc389f7 Ratio
BenchmarkInitializeWithoutCVE 62296184 ns/op 44603144 B/op 98679 allocs/op 29574100 ns/op 43949236 B/op 97946 allocs/op 2.11
BenchmarkInitializeWithoutCVE - ns/op 62296184 ns/op 29574100 ns/op 2.11

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 99e86bb Previous: fc389f7 Ratio
BenchmarkInitializeCommonWebAttack 57596321 ns/op 44604459 B/op 98683 allocs/op 28559147 ns/op 43950038 B/op 97947 allocs/op 2.02
BenchmarkInitializeCommonWebAttack - ns/op 57596321 ns/op 28559147 ns/op 2.02
BenchmarkInitializeCVE 58119357 ns/op 44603451 B/op 98681 allocs/op 28822842 ns/op 43949636 B/op 97946 allocs/op 2.02
BenchmarkInitializeCVE - ns/op 58119357 ns/op 28822842 ns/op 2.02
BenchmarkInitializeBadCrawler 59175319 ns/op 44602494 B/op 98680 allocs/op 28862908 ns/op 43949102 B/op 97944 allocs/op 2.05
BenchmarkInitializeBadCrawler - ns/op 59175319 ns/op 28862908 ns/op 2.05
BenchmarkInitializeDirectoryBruteforce 63343379 ns/op 44603967 B/op 98682 allocs/op 29612099 ns/op 43949703 B/op 97946 allocs/op 2.14
BenchmarkInitializeDirectoryBruteforce - ns/op 63343379 ns/op 29612099 ns/op 2.14
BenchmarkInitializeWithoutCommonWebAttack 59831339 ns/op 44602532 B/op 98677 allocs/op 29619594 ns/op 43949312 B/op 97945 allocs/op 2.02
BenchmarkInitializeWithoutCommonWebAttack - ns/op 59831339 ns/op 29619594 ns/op 2.02
BenchmarkInitializeWithoutCVE 60423723 ns/op 44601956 B/op 98678 allocs/op 29574100 ns/op 43949236 B/op 97946 allocs/op 2.04
BenchmarkInitializeWithoutCVE - ns/op 60423723 ns/op 29574100 ns/op 2.04
BenchmarkInitializeWithoutDirectoryBruteforce 58810024 ns/op 44604298 B/op 98681 allocs/op 29404007 ns/op 43949140 B/op 97944 allocs/op 2.00
BenchmarkInitializeWithoutDirectoryBruteforce - ns/op 58810024 ns/op 29404007 ns/op 2.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.