-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractor.go
268 lines (209 loc) · 6.13 KB
/
extractor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
package rpcsrv
import (
"bytes"
"github.com/sigex-kz/ddc"
)
// Extractor can be exported via net/rpc and used to extract embedded files from DDC
type Extractor int
// ExtractorRegisterArgs used to pass data to Extractor.Register
type ExtractorRegisterArgs struct {
}
// ExtractorRegisterResp used to retrieve data from Extractor.Register
type ExtractorRegisterResp struct {
// Error is not "" if any error occurred during the operation
Error string
// ID of the new extractor slot
ID string
}
// Register new extractor slot and retrieve it's id
func (t *Extractor) Register(args *ExtractorRegisterArgs, resp *ExtractorRegisterResp) error {
ee := extractorEntry{}
resp.ID = newStoreEntry(nil, &ee)
return nil
}
// ExtractorAppendDDCPartArgs used to pass data to Extractor.AppendDDCPart
type ExtractorAppendDDCPartArgs struct {
// ID of the extractor slot to use
ID string
// Part of the DDC
Part []byte
}
// ExtractorAppendDDCPartResp used to retrieve data from Extractor.AppendDDCPart
type ExtractorAppendDDCPartResp struct {
// Error is not "" if any error occurred during the operation
Error string
}
// AppendDDCPart to the specified extractor slot
func (t *Extractor) AppendDDCPart(args *ExtractorAppendDDCPartArgs, resp *ExtractorAppendDDCPartResp) error {
e, err := getStoreEntry(args.ID)
if err != nil {
resp.Error = err.Error()
return nil
}
e.mutex.Lock()
defer e.mutex.Unlock()
if e.ee == nil {
resp.Error = "unknown id"
return nil
}
_, err = e.ee.ddcFileBuffer.Write(args.Part)
if err != nil {
resp.Error = err.Error()
return nil
}
return nil
}
// ExtractorParseArgs used to pass data to Extractor.Parse
type ExtractorParseArgs struct {
// ID of the extractor slot to use
ID string
}
// ExtractorParseResp used to retrieve data from Extractor.Parse
type ExtractorParseResp struct {
// Error is not "" if any error occurred during the operation
Error string
// DocumentFileName extracted from DDC
DocumentFileName string
}
// Parse DDC in the specified slot, should be called after all parts of DDC've been
// transmitted via AppendDDCPart
func (t *Extractor) Parse(args *ExtractorParseArgs, resp *ExtractorParseResp) error {
e, err := getStoreEntry(args.ID)
if err != nil {
resp.Error = err.Error()
return nil
}
e.mutex.Lock()
defer e.mutex.Unlock()
err = clamAVScan(e.ee.ddcFileBuffer.Bytes())
if err != nil {
resp.Error = err.Error()
return nil
}
if e.ee == nil {
resp.Error = "unknown id"
return nil
}
documentOriginal, signatures, err := ddc.ExtractAttachments(bytes.NewReader(e.ee.ddcFileBuffer.Bytes()))
if err != nil {
resp.Error = err.Error()
return nil
}
err = clamAVScan(documentOriginal.Bytes)
if err != nil {
resp.Error = err.Error()
return nil
}
for _, s := range signatures {
err = clamAVScan(s.Bytes)
if err != nil {
resp.Error = err.Error()
return nil
}
}
e.ee.documentOriginal = documentOriginal
e.ee.signatures = signatures
resp.DocumentFileName = documentOriginal.Name
return nil
}
// ExtractorGetDocumentPartArgs used to pass data to Extractor.GetDocumentPart
type ExtractorGetDocumentPartArgs struct {
// ID of the extractor slot to use
ID string
// MaxPartSize should be used to limit the size of the part
MaxPartSize int
// Rewind to the beginning of the document
Rewind bool
}
// ExtractorGetDocumentPartResp used to retrieve data from Extractor.GetDocumentPart
type ExtractorGetDocumentPartResp struct {
// Error is not "" if any error occurred during the operation
Error string
// Part of the original document not larger than MaxPartSize
Part []byte
// IsFinal signals that there are no more parts to return
IsFinal bool
}
// GetDocumentPart retrieves parts of the original document in the specified slot successively, should be called after Parse
func (t *Extractor) GetDocumentPart(args *ExtractorGetDocumentPartArgs, resp *ExtractorGetDocumentPartResp) error {
e, err := getStoreEntry(args.ID)
if err != nil {
resp.Error = err.Error()
return nil
}
e.mutex.Lock()
defer e.mutex.Unlock()
if e.ee == nil {
resp.Error = "unknown id"
return nil
}
if e.ee.documentOriginal == nil {
resp.Error = "DDC not parsed"
return nil
}
if args.Rewind {
e.ee.documentOriginalBytesRead = 0
}
bytesRemain := len(e.ee.documentOriginal.Bytes) - e.ee.documentOriginalBytesRead
partSize := args.MaxPartSize
if partSize >= bytesRemain {
partSize = bytesRemain
resp.IsFinal = true
}
resp.Part = e.ee.documentOriginal.Bytes[e.ee.documentOriginalBytesRead : e.ee.documentOriginalBytesRead+partSize]
e.ee.documentOriginalBytesRead += partSize
return nil
}
// ExtractorGetSignatureArgs used to pass data to Extractor.GetSignature
type ExtractorGetSignatureArgs struct {
// ID of the extractor slot to use
ID string
}
// ExtractorGetSignatureResp used to retrieve data from Extractor.GetSignature
type ExtractorGetSignatureResp struct {
// Error is not "" if any error occurred during the operation
Error string
// Signature bytes and file name
Signature ddc.AttachedFile
// IsFinal signals that there are no more signatures to return
IsFinal bool
}
// GetSignature retrieves signatures that've benn embedded into DDC successively, should be called after Parse
func (t *Extractor) GetSignature(args *ExtractorGetSignatureArgs, resp *ExtractorGetSignatureResp) error {
e, err := getStoreEntry(args.ID)
if err != nil {
resp.Error = err.Error()
return nil
}
e.mutex.Lock()
defer e.mutex.Unlock()
if e.ee == nil {
resp.Error = "unknown id"
return nil
}
if e.ee.signatures == nil {
resp.Error = "DDC not parsed"
return nil
}
resp.Signature = e.ee.signatures[0]
e.ee.signatures = e.ee.signatures[1:]
if len(e.ee.signatures) == 0 {
resp.IsFinal = true
}
return nil
}
// ExtractorDropArgs used to pass data to Extractor.Drop
type ExtractorDropArgs struct {
// ID of the extractor slot to use
ID string
}
// ExtractorDropResp used to retrieve data from Extractor.Drop
type ExtractorDropResp struct {
// Error is not "" if any error occurred during the operation
Error string
}
// Drop DDC in the specified slot
func (t *Extractor) Drop(args *ExtractorDropArgs, resp *ExtractorDropResp) error {
deleteStoreEntry(args.ID)
return nil
}