-
Notifications
You must be signed in to change notification settings - Fork 1
/
matcher.go
101 lines (91 loc) · 2.71 KB
/
matcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package imgmatch
import (
"fmt"
"image"
"image/jpeg"
"os"
"path/filepath"
"strings"
"sync"
"github.com/corona10/goimagehash"
"github.com/sirupsen/logrus"
)
func NewMatcher(samplesDir string, interestingThreshold, suspiciousThreshold int) (*Matcher, error) {
m := &Matcher{
interestingSampleThreshold: interestingThreshold,
suspiciousSampleThreshold: suspiciousThreshold,
samples: make(map[string]*goimagehash.ImageHash),
}
entries, err := os.ReadDir(samplesDir)
if err != nil {
return nil, fmt.Errorf("listing samples directory: %w", err)
}
for _, entry := range entries {
if strings.HasPrefix(entry.Name(), ".") || entry.IsDir() {
continue
}
fullPath := filepath.Join(samplesDir, entry.Name())
file, err := os.Open(fullPath)
if err != nil {
return nil, fmt.Errorf("opening %s: %w", fullPath, err)
}
img, err := jpeg.Decode(file)
if err != nil {
return nil, fmt.Errorf("decoding %s as jpeg: %w", fullPath, err)
}
if _, err := m.AddSample(entry.Name(), img); err != nil {
return nil, fmt.Errorf("adding sample %s: %w", fullPath, err)
}
}
logrus.Debugf("Loaded %v spam image samples", len(m.samples))
return m, nil
}
type Matcher struct {
interestingSampleThreshold int
suspiciousSampleThreshold int
mu sync.RWMutex
samples map[string]*goimagehash.ImageHash
}
func (m *Matcher) AddSample(name string, img image.Image) (added bool, err error) {
hash, err := goimagehash.PerceptionHash(img)
if err != nil {
return false, fmt.Errorf("calculating hash: %w", err)
}
logger := logrus.WithField("image_sample", name)
logger.Debugf("Got new sample with hash %v", hash.ToString())
m.mu.Lock()
defer m.mu.Unlock()
for otherName, otherHash := range m.samples {
dist, err := hash.Distance(otherHash)
if err != nil {
return false, fmt.Errorf("calculating distance to hash %s: %w", otherName, err)
}
if dist <= m.interestingSampleThreshold {
logger.Debugf("New sample matches %s with distance %d", otherName, dist)
return false, nil
}
}
m.samples[name] = hash
return true, nil
}
func (m *Matcher) CheckSample(img image.Image) (match bool, err error) {
hash, err := goimagehash.PerceptionHash(img)
if err != nil {
return false, fmt.Errorf("calculating hash: %w", err)
}
logger := logrus.WithField("sample_hash", hash.ToString())
logger.Debugf("Checking sample")
m.mu.RLock()
defer m.mu.RUnlock()
for otherName, otherHash := range m.samples {
dist, err := hash.Distance(otherHash)
if err != nil {
return false, fmt.Errorf("calculating distance to hash %s: %w", otherName, err)
}
if dist <= m.suspiciousSampleThreshold {
logger.Debugf("Sample matches %s with dist %v", otherName, dist)
return true, nil
}
}
return false, nil
}