forked from mandiant/GoReSym
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractor.go
314 lines (273 loc) · 10.9 KB
/
extractor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
package extractor
import (
"bytes"
"debug/buildinfo"
"fmt"
"os"
"runtime/debug"
"strings"
"github.com/mihailkirov/GoReSym/buildid"
"github.com/mihailkirov/GoReSym/objfile"
)
// ExtractSymbols extract the metadata from the binary.
// Practically, it does the same as the entry point main
// but can be called from other Go code
func ExtractSymbols(fileName string, printStdPkgs, printFilePaths, printTypes bool,
typeAddress int, versionOverride string) (*ExtractMetadata, error) {
metadata, err := main_impl(fileName, printStdPkgs, printFilePaths, printTypes, typeAddress, versionOverride)
if err != nil {
return nil, err
}
return &metadata, nil
}
func isStdPackage(pkg string) bool {
// Empty name is common for reflect/type functions and some runtime symbols
if len(strings.TrimSpace(pkg)) <= 0 {
return true
}
for _, v := range standardPackages {
if v == pkg {
return true
}
}
return false
}
// pclntab header info
type PcLnTabMetadata struct {
VA uint64
Version string
Endianess string
CpuQuantum uint32
CpuQuantumStr string
PointerSize uint32
}
type FuncMetadata struct {
Start uint64
End uint64
PackageName string
FullName string
}
type ExtractMetadata struct {
Version string
BuildId string
Arch string
OS string
TabMeta PcLnTabMetadata
ModuleMeta objfile.ModuleData
Types []objfile.Type
Interfaces []objfile.Type
BuildInfo debug.BuildInfo
Files []string
UserFunctions []FuncMetadata
StdFunctions []FuncMetadata
}
func main_impl_tmpfile(fileBytes []byte, printStdPkgs bool, printFilePaths bool, printTypes bool, manualTypeAddress int, versionOverride string) (metadata ExtractMetadata, err error) {
tmpFile, err := os.CreateTemp(os.TempDir(), "goresym_tmp-")
if err != nil {
return ExtractMetadata{}, fmt.Errorf("failed to create temporary file: %s", err)
}
defer os.Remove(tmpFile.Name())
if _, err = tmpFile.Write(fileBytes); err != nil {
return ExtractMetadata{}, fmt.Errorf("failed to write bytes to temporary file: %s", err)
}
if err := tmpFile.Close(); err != nil {
return ExtractMetadata{}, fmt.Errorf("failed to close temporary file: %s", err)
}
return main_impl(tmpFile.Name(), printStdPkgs, printFilePaths, printTypes, manualTypeAddress, versionOverride)
}
func main_impl(fileName string, printStdPkgs bool, printFilePaths bool, printTypes bool, manualTypeAddress int, versionOverride string) (metadata ExtractMetadata, err error) {
extractMetadata := ExtractMetadata{}
file, err := objfile.Open(fileName)
if err != nil {
return ExtractMetadata{}, fmt.Errorf("invalid file: %w", err)
}
buildId, err := buildid.ReadFile(fileName)
if err == nil {
extractMetadata.BuildId = buildId
} else {
extractMetadata.BuildId = ""
}
// try to get version the 'correct' way, also fill out buildSettings if parsing was ok
bi, err := buildinfo.ReadFile(fileName)
if err == nil {
extractMetadata.Version = bi.GoVersion
for _, setting := range bi.Settings {
if setting.Key == "GOOS" {
extractMetadata.OS = setting.Value
} else if setting.Key == "GOARCH" {
extractMetadata.Arch = setting.Value
}
}
extractMetadata.BuildInfo = *bi
}
// Optional bruteforce any one of these, but only if they weren't previous found in the buildinfo
if extractMetadata.OS == "" || extractMetadata.Arch == "" || extractMetadata.Version == "" {
// GOARCH
if extractMetadata.Arch == "" {
extractMetadata.Arch = file.GOARCH()
}
fileData, fileDataErr := os.ReadFile(fileName)
if fileDataErr == nil {
// GOVERSION
if extractMetadata.Version == "" {
// go1.<varies><garbage data>
idx := bytes.Index(fileData, []byte{0x67, 0x6F, 0x31, 0x2E})
if idx != -1 && len(fileData[idx:]) > 10 {
extractMetadata.Version = "go1."
ver := fileData[idx+4 : idx+10]
for i, c := range ver {
// the string is _not_ null terminated, nor length delimited. So, filter till first non-numeric ascii
nextIsNumeric := (i+1) < len(ver) && ver[i+1] >= 0x30 && ver[i+1] <= 0x39
// careful not to end with a . at the end
if (c >= 0x30 && c <= 0x39 && c != ' ') || (c == '.' && nextIsNumeric) {
extractMetadata.Version += string([]byte{c})
} else {
break
}
}
}
}
// GOOS
if extractMetadata.OS == "" {
// try to find the OS by locating the source file name from https://github.com/golang/go/tree/master/src/runtime/os_<os name>.go or the asm file name rt0_<os name>_<arch>.s
// if this is bad, we can end up signaturing the asm file manually (todo)
// /src/runtime/os_
needleSrcFile := []byte{0x2F, 0x73, 0x72, 0x63, 0x2F, 0x72, 0x75, 0x6E, 0x74, 0x69, 0x6D, 0x65, 0x2F, 0x6F, 0x73, 0x5F}
needleSrcFileLen := len(needleSrcFile)
idx := bytes.Index(fileData, needleSrcFile)
if idx != -1 && len(fileData[idx:]) > needleSrcFileLen+20 {
os_str := fileData[idx+needleSrcFileLen : idx+needleSrcFileLen+20]
for _, c := range os_str {
// end our search at the first '.', which should be the .go soure file extension, or a space as fallback
if (c >= 0x30 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a) && c != '.' && c != ' ' {
extractMetadata.OS += string([]byte{c})
} else {
break
}
}
} else {
// /src/runtime/rt0_
needleAsmFile := []byte{0x2F, 0x73, 0x72, 0x63, 0x2F, 0x72, 0x75, 0x6E, 0x74, 0x69, 0x6D, 0x65, 0x2F, 0x72, 0x74, 0x30, 0x5F}
needleAsmFileLen := len(needleAsmFile)
idx := bytes.Index(fileData, needleAsmFile)
if idx != -1 && len(fileData[idx:]) > needleAsmFileLen+20 {
os_str := fileData[idx+needleAsmFileLen : idx+needleAsmFileLen+20]
for _, c := range os_str {
// end our search at the first '_', which should be the _arch, space as fallback
if (c >= 0x30 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a) && c != '_' && c != '.' && c != ' ' {
extractMetadata.OS += string([]byte{c})
} else {
break
}
}
}
}
}
}
}
var knownPclntabVA = uint64(0)
var knownGoTextBase = uint64(0)
restartParseWithRealTextBase:
tabs, err := file.PCLineTable(versionOverride, knownPclntabVA, knownGoTextBase)
if err != nil {
return ExtractMetadata{}, fmt.Errorf("failed to read pclntab: %w", err)
}
if len(tabs) == 0 {
return ExtractMetadata{}, fmt.Errorf("no pclntab candidates found")
}
var moduleData *objfile.ModuleData = nil
var finalTab *objfile.PclntabCandidate = &tabs[0]
for _, tab := range tabs {
if len(versionOverride) > 0 {
extractMetadata.Version = versionOverride
}
// numeric only, go1.17 -> 1.17
goVersionIdx := strings.Index(extractMetadata.Version, "go")
if goVersionIdx != -1 {
// "devel go1.18-2d1d548 Tue Dec 21 03:55:43 2021 +0000"
extractMetadata.Version = strings.Split(extractMetadata.Version[goVersionIdx+2:]+" ", " ")[0]
// go1.18-2d1d548
extractMetadata.Version = strings.Split(extractMetadata.Version+"-", "-")[0]
}
extractMetadata.TabMeta.CpuQuantum = tab.ParsedPclntab.Go12line.Quantum
// quantum is the minimal unit for a program counter (1 on x86, 4 on most other systems).
// 386: 1, amd64: 1, arm: 4, arm64: 4, mips: 4, mips/64/64le/64be: 4, ppc64/64le: 4, riscv64: 4, s390x: 2, wasm: 1
extractMetadata.TabMeta.CpuQuantumStr = "x86/x64/wasm"
if extractMetadata.TabMeta.CpuQuantum == 2 {
extractMetadata.TabMeta.CpuQuantumStr = "s390x"
} else if extractMetadata.TabMeta.CpuQuantum == 4 {
extractMetadata.TabMeta.CpuQuantumStr = "arm/mips/ppc/riscv"
}
extractMetadata.TabMeta.VA = tab.PclntabVA
extractMetadata.TabMeta.Version = tab.ParsedPclntab.Go12line.Version.String()
extractMetadata.TabMeta.Endianess = tab.ParsedPclntab.Go12line.Binary.String()
extractMetadata.TabMeta.PointerSize = tab.ParsedPclntab.Go12line.Ptrsize
// this can be a little tricky to locate and parse properly across all go versions
// since moduledata holds a pointer to the pclntab, we can (hopefully) find the right candidate by using it to find the moduledata.
// if that location works, then we must have given it the correct pclntab VA. At least in theory...
// The resolved offsets within the pclntab might have used the wrong base though! We'll fix that later.
_, tmpModData, err := file.ModuleDataTable(tab.PclntabVA, extractMetadata.Version, extractMetadata.TabMeta.Version, extractMetadata.TabMeta.PointerSize == 8, extractMetadata.TabMeta.Endianess == "LittleEndian")
if err == nil && tmpModData != nil {
// if the search candidate relied on a moduledata va, make sure it lines up with ours now
stomppedMagicMetaConstraintsValid := true
if tab.StompMagicCandidateMeta != nil {
stomppedMagicMetaConstraintsValid = tab.StompMagicCandidateMeta.SuspectedModuleDataVa == tmpModData.VA
}
if knownGoTextBase == 0 && knownPclntabVA == 0 && stomppedMagicMetaConstraintsValid {
// assign real base and restart pclntab parsing with correct VAs!
knownGoTextBase = tmpModData.TextVA
knownPclntabVA = tab.PclntabVA
goto restartParseWithRealTextBase
}
// we already have pclntab candidates with the right VA, but which candidate?? The one that finds a valid moduledata!
finalTab = &tab
moduleData = tmpModData
break
}
}
// to be sure we got the right pclntab we had to have found a moduledat as well. If we didn't, then we failed to find the pclntab (correctly) as well
if moduleData == nil {
return ExtractMetadata{}, fmt.Errorf("no valid pclntab or moduledata found")
}
extractMetadata.ModuleMeta = *moduleData
if printTypes && manualTypeAddress == 0 {
types, err := file.ParseTypeLinks(extractMetadata.Version, moduleData, extractMetadata.TabMeta.PointerSize == 8, extractMetadata.TabMeta.Endianess == "LittleEndian")
if err == nil {
extractMetadata.Types = types
}
interfaces, err := file.ParseITabLinks(extractMetadata.Version, moduleData, extractMetadata.TabMeta.PointerSize == 8, extractMetadata.TabMeta.Endianess == "LittleEndian")
if err == nil {
extractMetadata.Interfaces = interfaces
}
} else if manualTypeAddress != 0 {
types, err := file.ParseType(extractMetadata.Version, moduleData, uint64(manualTypeAddress), extractMetadata.TabMeta.PointerSize == 8, extractMetadata.TabMeta.Endianess == "LittleEndian")
if err == nil {
extractMetadata.Types = types
}
}
if printFilePaths {
for k := range finalTab.ParsedPclntab.Files {
extractMetadata.Files = append(extractMetadata.Files, k)
}
}
for _, elem := range finalTab.ParsedPclntab.Funcs {
if isStdPackage(elem.PackageName()) {
if printStdPkgs {
extractMetadata.StdFunctions = append(extractMetadata.StdFunctions, FuncMetadata{
Start: elem.Entry,
End: elem.End,
PackageName: elem.PackageName(),
FullName: elem.Name,
})
}
} else {
extractMetadata.UserFunctions = append(extractMetadata.UserFunctions, FuncMetadata{
Start: elem.Entry,
End: elem.End,
PackageName: elem.PackageName(),
FullName: elem.Name,
})
}
}
return extractMetadata, nil
}