-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pigo.go
333 lines (285 loc) · 9.36 KB
/
pigo.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
package pigo
import (
"bytes"
"encoding/binary"
"math"
"sort"
"unsafe"
)
// CascadeParams contains the basic parameters to run the analyzer function over the defined image.
// MinSize: represents the minimum size of the face.
// MaxSize: represents the maximum size of the face.
// ShiftFactor: determines to what percentage to move the detection window over its size.
// ScaleFactor: defines in percentage the resize value of the detection window when moving to a higher scale.
type CascadeParams struct {
MinSize int
MaxSize int
ShiftFactor float64
ScaleFactor float64
ImageParams
}
// ImageParams is a struct for image related settings.
// Pixels: contains the grayscale converted image pixel data.
// Rows: the number of image rows.
// Cols: the number of image columns.
// Dim: the image dimension.
type ImageParams struct {
Pixels []uint8
Rows int
Cols int
Dim int
}
// Pigo struct defines the basic binary tree components.
type Pigo struct {
treeDepth uint32
treeNum uint32
treeCodes []int8
treePred []float32
treeThreshold []float32
}
// NewPigo instantiate a new pigo struct.
func NewPigo() *Pigo {
return &Pigo{}
}
// Unpack unpack the binary face classification file.
func (pg *Pigo) Unpack(packet []byte) (*Pigo, error) {
var (
treeDepth uint32
treeNum uint32
treeCodes []int8
treePred []float32
treeThreshold []float32
)
// We skip the first 8 bytes of the cascade file.
pos := 8
buff := make([]byte, 4)
dataView := bytes.NewBuffer(buff)
// Read the depth (size) of each tree and write it into the buffer array.
_, err := dataView.Write([]byte{packet[pos+0], packet[pos+1], packet[pos+2], packet[pos+3]})
if err != nil {
return nil, err
}
if dataView.Len() > 0 {
treeDepth = binary.LittleEndian.Uint32(packet[pos:])
pos += 4
// Get the number of cascade trees as 32-bit unsigned integer and write it into the buffer array.
_, err := dataView.Write([]byte{packet[pos+0], packet[pos+1], packet[pos+2], packet[pos+3]})
if err != nil {
return nil, err
}
treeNum = binary.LittleEndian.Uint32(packet[pos:])
pos += 4
for t := 0; t < int(treeNum); t++ {
treeCodes = append(treeCodes, []int8{0, 0, 0, 0}...)
code := packet[pos : pos+int(4*math.Pow(2, float64(treeDepth))-4)]
// Convert unsigned bytecodes to signed ones.
signedCode := *(*[]int8)(unsafe.Pointer(&code))
treeCodes = append(treeCodes, signedCode...)
pos = pos + int(4*math.Pow(2, float64(treeDepth))-4)
// Read prediction from tree's leaf nodes.
for i := 0; i < int(math.Pow(2, float64(treeDepth))); i++ {
_, err := dataView.Write([]byte{packet[pos+0], packet[pos+1], packet[pos+2], packet[pos+3]})
if err != nil {
return nil, err
}
u32pred := binary.LittleEndian.Uint32(packet[pos:])
// Convert uint32 to float32
f32pred := *(*float32)(unsafe.Pointer(&u32pred))
treePred = append(treePred, f32pred)
pos += 4
}
// Read tree nodes threshold values.
_, err := dataView.Write([]byte{packet[pos+0], packet[pos+1], packet[pos+2], packet[pos+3]})
if err != nil {
return nil, err
}
u32thr := binary.LittleEndian.Uint32(packet[pos:])
// Convert uint32 to float32
f32thr := *(*float32)(unsafe.Pointer(&u32thr))
treeThreshold = append(treeThreshold, f32thr)
pos += 4
}
}
return &Pigo{
treeDepth,
treeNum,
treeCodes,
treePred,
treeThreshold,
}, nil
}
// classifyRegion constructs the classification function based on the parsed binary data.
func (pg *Pigo) classifyRegion(r, c, s int, pixels []uint8, dim int) float32 {
var (
root int = 0
out float32
pTree = int(math.Pow(2, float64(pg.treeDepth)))
)
r = r * 256
c = c * 256
for i := 0; i < int(pg.treeNum); i++ {
var idx = 1
for j := 0; j < int(pg.treeDepth); j++ {
var pix = 0
x1 := ((r+int(pg.treeCodes[root+4*idx+0])*s)>>8)*dim + ((c + int(pg.treeCodes[root+4*idx+1])*s) >> 8)
x2 := ((r+int(pg.treeCodes[root+4*idx+2])*s)>>8)*dim + ((c + int(pg.treeCodes[root+4*idx+3])*s) >> 8)
px1 := pixels[x1]
px2 := pixels[x2]
if px1 <= px2 {
pix = 1
} else {
pix = 0
}
idx = 2*idx + pix
}
out += pg.treePred[pTree*i+idx-pTree]
if out <= pg.treeThreshold[i] {
return -1.0
} else {
root += 4 * pTree
}
}
return out - pg.treeThreshold[pg.treeNum-1]
}
// classifyRotatedRegion applies the face classification function over a rotated image based on the parsed binary data.
func (pg *Pigo) classifyRotatedRegion(r, c, s int, a float64, nrows, ncols int, pixels []uint8, dim int) float32 {
var (
root int = 0
out float32
pTree = int(math.Pow(2, float64(pg.treeDepth)))
)
r = r * 65536
c = c * 65536
qCosTable := []int{256, 251, 236, 212, 181, 142, 97, 49, 0, -49, -97, -142, -181, -212, -236, -251, -256, -251, -236, -212, -181, -142, -97, -49, 0, 49, 97, 142, 181, 212, 236, 251, 256}
qSinTable := []int{0, 49, 97, 142, 181, 212, 236, 251, 256, 251, 236, 212, 181, 142, 97, 49, 0, -49, -97, -142, -181, -212, -236, -251, -256, -251, -236, -212, -181, -142, -97, -49, 0}
qsin := s * qSinTable[int(32.0*a)] //s*(256.0*math.Sin(2*math.Pi*a))
qcos := s * qCosTable[int(32.0*a)] //s*(256.0*math.Cos(2*math.Pi*a))
if (r+46341*s)/65536 >= nrows || (r-46341*s)/65536 < 0 || (c+46341*s)/65536 >= ncols || (c-46341*s)/65536 < 0 {
return -1
}
for i := 0; i < int(pg.treeNum); i++ {
var idx = 1
for j := 0; j < int(pg.treeDepth); j++ {
var pix = 0
r1 := abs(r+qcos*int(pg.treeCodes[root+4*idx+0])-qsin*int(pg.treeCodes[root+4*idx+1])) >> 16
c1 := abs(c+qsin*int(pg.treeCodes[root+4*idx+0])+qcos*int(pg.treeCodes[root+4*idx+1])) >> 16
r2 := abs(r+qcos*int(pg.treeCodes[root+4*idx+2])-qsin*int(pg.treeCodes[root+4*idx+3])) >> 16
c2 := abs(c+qsin*int(pg.treeCodes[root+4*idx+2])+qcos*int(pg.treeCodes[root+4*idx+3])) >> 16
px1 := pixels[r1*dim+c1]
px2 := pixels[r2*dim+c2]
if px1 <= px2 {
pix = 1
} else {
pix = 0
}
idx = 2*idx + pix
}
out += pg.treePred[pTree*i+idx-pTree]
if out <= pg.treeThreshold[i] {
return -1.0
} else {
root += 4 * pTree
}
}
return out - pg.treeThreshold[pg.treeNum-1]
}
// Detection struct contains the detection results composed of
// the row, column, scale factor and the detection score.
type Detection struct {
Row int
Col int
Scale int
Q float32
}
// RunCascade analyze the grayscale converted image pixel data and run the classification function over the detection window.
// It will return a slice containing the detection row, column, it's center and the detection score (in case this is greater than 0.0).
func (pg *Pigo) RunCascade(cp CascadeParams, angle float64) []Detection {
var detections []Detection
var pixels = cp.Pixels
var q float32
scale := cp.MinSize
// Run the classification function over the detection window
// and check if the false positive rate is above a certain value.
for scale <= cp.MaxSize {
step := int(math.Max(cp.ShiftFactor*float64(scale), 1))
offset := (scale/2 + 1)
for row := offset; row <= cp.Rows-offset; row += step {
for col := offset; col <= cp.Cols-offset; col += step {
if angle > 0.0 {
if angle > 1.0 {
angle = 1.0
}
q = pg.classifyRotatedRegion(row, col, scale, angle, cp.Rows, cp.Cols, pixels, cp.Dim)
} else {
q = pg.classifyRegion(row, col, scale, pixels, cp.Dim)
}
if q > 0.0 {
detections = append(detections, Detection{row, col, scale, q})
}
}
}
scale = int(float64(scale) * cp.ScaleFactor)
}
return detections
}
// ClusterDetections returns the intersection over union of multiple clusters.
// We need to make this comparision to filter out multiple face detection regions.
func (pg *Pigo) ClusterDetections(detections []Detection, iouThreshold float64) []Detection {
// Sort detections by their score
sort.Sort(det(detections))
calcIoU := func(det1, det2 Detection) float64 {
// Unpack the position and size of each detection.
r1, c1, s1 := float64(det1.Row), float64(det1.Col), float64(det1.Scale)
r2, c2, s2 := float64(det2.Row), float64(det2.Col), float64(det2.Scale)
overRow := math.Max(0, math.Min(r1+s1/2, r2+s2/2)-math.Max(r1-s1/2, r2-s2/2))
overCol := math.Max(0, math.Min(c1+s1/2, c2+s2/2)-math.Max(c1-s1/2, c2-s2/2))
// Return intersection over union.
return overRow * overCol / (s1*s1 + s2*s2 - overRow*overCol)
}
assignments := make([]bool, len(detections))
clusters := []Detection{}
for i := 0; i < len(detections); i++ {
// Compare the intersection over union only for two different clusters.
// Skip the comparison in case there already exists a cluster A in the bucket.
if !assignments[i] {
var (
r, c, s, n int
q float32
)
for j := 0; j < len(detections); j++ {
// Check if the comparision result is below a certain threshold.
if calcIoU(detections[i], detections[j]) > iouThreshold {
assignments[j] = true
r += detections[j].Row
c += detections[j].Col
s += detections[j].Scale
q += detections[j].Q
n++
}
}
if n > 0 {
clusters = append(clusters, Detection{r / n, c / n, s / n, q})
}
}
}
return clusters
}
// Implement sorting function on detection values.
type det []Detection
func (q det) Len() int { return len(q) }
func (q det) Swap(i, j int) { q[i], q[j] = q[j], q[i] }
func (q det) Less(i, j int) bool {
if q[i].Q < q[j].Q {
return true
}
if q[i].Q > q[j].Q {
return false
}
return q[i].Q < q[j].Q
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}