Skip to content

Commit e2b81e0

Browse files
committed
Parse MachO Dyld Info, add Python API and update doc
1 parent ffca52f commit e2b81e0

File tree

11 files changed

+634
-43
lines changed

11 files changed

+634
-43
lines changed

api/python/MachO/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ set(LIEF_PYTHON_MACHO_SRC
1111
"${CMAKE_CURRENT_LIST_DIR}/objects/pyUUID.cpp"
1212
"${CMAKE_CURRENT_LIST_DIR}/objects/pyMainCommand.cpp"
1313
"${CMAKE_CURRENT_LIST_DIR}/objects/pyDylinker.cpp"
14-
"${CMAKE_CURRENT_LIST_DIR}/pyMachOStructures.cpp")
14+
"${CMAKE_CURRENT_LIST_DIR}/objects/pyDyldInfo.cpp"
15+
"${CMAKE_CURRENT_LIST_DIR}/pyMachOStructures.cpp"
16+
)
1517

1618
set(LIEF_PYTHON_MACHO_HDR
1719
"${CMAKE_CURRENT_LIST_DIR}/pyMachO.hpp")
+217
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
/* Copyright 2017 R. Thomas
2+
* Copyright 2017 Quarkslab
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include <algorithm>
17+
18+
#include <string>
19+
#include <sstream>
20+
21+
#include "LIEF/visitors/Hash.hpp"
22+
#include "LIEF/MachO/DyldInfo.hpp"
23+
24+
#include "pyMachO.hpp"
25+
26+
template<class T>
27+
using getter_t = T (DyldInfo::*)(void) const;
28+
29+
template<class T>
30+
using setter_t = void (DyldInfo::*)(T);
31+
32+
33+
void init_MachO_DyldInfo_class(py::module& m) {
34+
35+
py::class_<DyldInfo, LoadCommand>(m, "DyldInfo")
36+
37+
.def_property("rebase",
38+
static_cast<getter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::rebase),
39+
static_cast<setter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::rebase),
40+
"*Rebase* information as a tuple ``(offset, size)``\n\n"
41+
42+
"Dyld rebases an image whenever dyld loads it at an address different \n"
43+
"from its preferred address. The rebase information is a stream \n"
44+
"of byte sized opcodes whose symbolic names start with ``REBASE_OPCODE_``. \n"
45+
"Conceptually the rebase information is a table of tuples: \n"
46+
"``(seg-index, seg-offset, type)``\n"
47+
"The opcodes are a compressed way to encode the table by only \n"
48+
"encoding when a column changes. In addition simple patterns \n"
49+
"like \"every n'th offset for m times\" can be encoded in a few \n"
50+
"bytes.\n\n"
51+
52+
".. seealso::\n\n"
53+
"\t``/usr/include/mach-o/loader.h``\n",
54+
py::return_value_policy::reference_internal)
55+
56+
57+
.def_property("bind",
58+
static_cast<getter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::bind),
59+
static_cast<setter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::bind),
60+
"*Bind* information as a tuple ``(offset, size)``\n\n"
61+
62+
"Dyld binds an image during the loading process, if the image\n"
63+
"requires any pointers to be initialized to symbols in other images.\n"
64+
"The rebase information is a stream of byte sized\n"
65+
"opcodes whose symbolic names start with ``BIND_OPCODE_``.\n"
66+
"Conceptually the bind information is a table of tuples:\n"
67+
"``(seg-index, seg-offset, type, symbol-library-ordinal, symbol-name, addend)``\n"
68+
"The opcodes are a compressed way to encode the table by only\n"
69+
"encoding when a column changes. In addition simple patterns\n"
70+
"like for runs of pointers initialzed to the same value can be\n"
71+
"encoded in a few bytes.\n\n"
72+
73+
".. seealso::\n\n"
74+
"\t``/usr/include/mach-o/loader.h``\n",
75+
py::return_value_policy::reference_internal)
76+
77+
78+
.def_property("weak_bind",
79+
static_cast<getter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::weak_bind),
80+
static_cast<setter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::weak_bind),
81+
"*Weak Bind* information as a tuple ``(offset, size)``\n\n"
82+
83+
"Some C++ programs require dyld to unique symbols so that all\n"
84+
"images in the process use the same copy of some code/data.\n"
85+
"This step is done after binding. The content of the weak_bind\n"
86+
"info is an opcode stream like the bind_info. But it is sorted\n"
87+
"alphabetically by symbol name. This enable dyld to walk\n"
88+
"all images with weak binding information in order and look\n"
89+
"for collisions. If there are no collisions, dyld does\n"
90+
"no updating. That means that some fixups are also encoded\n"
91+
"in the bind_info. For instance, all calls to ``operator new`` \n"
92+
"are first bound to ``libstdc++.dylib`` using the information\n"
93+
"in bind_info. Then if some image overrides operator new\n"
94+
"that is detected when the weak_bind information is processed\n"
95+
"and the call to operator new is then rebound.\n\n"
96+
97+
".. seealso::\n\n"
98+
"\t``/usr/include/mach-o/loader.h``\n",
99+
py::return_value_policy::reference_internal)
100+
101+
102+
.def_property("lazy_bind",
103+
static_cast<getter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::lazy_bind),
104+
static_cast<setter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::lazy_bind),
105+
"*Lazy Bind* information as a tuple ``(offset, size)``\n\n"
106+
107+
"Some uses of external symbols do not need to be bound immediately.\n"
108+
"Instead they can be lazily bound on first use. The lazy_bind\n"
109+
"are contains a stream of BIND opcodes to bind all lazy symbols.\n"
110+
"Normal use is that dyld ignores the lazy_bind section when\n"
111+
"loading an image. Instead the static linker arranged for the\n"
112+
"lazy pointer to initially point to a helper function which\n"
113+
"pushes the offset into the lazy_bind area for the symbol\n"
114+
"needing to be bound, then jumps to dyld which simply adds\n"
115+
"the offset to lazy_bind_off to get the information on what\n"
116+
"to bind.\n\n"
117+
118+
".. seealso::\n\n"
119+
"\t``/usr/include/mach-o/loader.h``\n",
120+
py::return_value_policy::reference_internal)
121+
122+
123+
.def_property("export_info",
124+
static_cast<getter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::export_info),
125+
static_cast<setter_t<const LIEF::MachO::DyldInfo::info_t&>>(&DyldInfo::export_info),
126+
"*Export* information as a tuple ``(offset, size)``\n\n"
127+
128+
"The symbols exported by a dylib are encoded in a trie. This\n"
129+
"is a compact representation that factors out common prefixes.\n"
130+
"It also reduces ``LINKEDIT`` pages in RAM because it encodes all\n"
131+
"information (name, address, flags) in one small, contiguous range.\n"
132+
"The export area is a stream of nodes. The first node sequentially\n"
133+
"is the start node for the trie.\n\n"
134+
135+
"Nodes for a symbol start with a byte that is the length of\n"
136+
"the exported symbol information for the string so far.\n"
137+
"If there is no exported symbol, the byte is zero. If there\n"
138+
"is exported info, it follows the length byte. The exported\n"
139+
"info normally consists of a flags and offset both encoded\n"
140+
"in uleb128. The offset is location of the content named\n"
141+
"by the symbol. It is the offset from the mach_header for\n"
142+
"the image.\n\n"
143+
144+
"After the initial byte and optional exported symbol information\n"
145+
"is a byte of how many edges (0-255) that this node has leaving\n"
146+
"it, followed by each edge.\n"
147+
"Each edge is a zero terminated cstring of the addition chars\n"
148+
"in the symbol, followed by a uleb128 offset for the node that\n"
149+
"edge points to.\n\n"
150+
151+
".. seealso::\n\n"
152+
"\t``/usr/include/mach-o/loader.h``\n",
153+
py::return_value_policy::reference_internal)
154+
155+
.def("set_rebase_offset",
156+
&DyldInfo::set_rebase_offset,
157+
"offset"_a)
158+
159+
.def("set_rebase_size",
160+
&DyldInfo::set_rebase_size,
161+
"size"_a)
162+
163+
164+
.def("set_bind_offset",
165+
&DyldInfo::set_bind_offset,
166+
"offset"_a)
167+
168+
.def("set_bind_size",
169+
&DyldInfo::set_bind_size,
170+
"size"_a)
171+
172+
173+
.def("set_weak_bind_offset",
174+
&DyldInfo::set_weak_bind_offset,
175+
"offset"_a)
176+
177+
.def("set_weak_bind_size",
178+
&DyldInfo::set_weak_bind_size,
179+
"size"_a)
180+
181+
182+
.def("set_lazy_bind_offset",
183+
&DyldInfo::set_lazy_bind_offset,
184+
"offset"_a)
185+
186+
.def("set_lazy_bind_size",
187+
&DyldInfo::set_lazy_bind_size,
188+
"size"_a)
189+
190+
191+
.def("set_export_offset",
192+
&DyldInfo::set_export_offset,
193+
"offset"_a)
194+
195+
.def("set_export_size",
196+
&DyldInfo::set_export_size,
197+
"size"_a)
198+
199+
200+
.def("__eq__", &DyldInfo::operator==)
201+
.def("__ne__", &DyldInfo::operator!=)
202+
.def("__hash__",
203+
[] (const DyldInfo& info) {
204+
return LIEF::Hash::hash(info);
205+
})
206+
207+
208+
.def("__str__",
209+
[] (const DyldInfo& info)
210+
{
211+
std::ostringstream stream;
212+
stream << info;
213+
std::string str = stream.str();
214+
return str;
215+
});
216+
217+
}

api/python/MachO/pyMachO.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void init_MachO_module(py::module& m) {
3737
init_MachO_UUIDCommand_class(LIEF_MachO_module);
3838
init_MachO_MainCommand_class(LIEF_MachO_module);
3939
init_MachO_DylinkerCommand_class(LIEF_MachO_module);
40+
init_MachO_DyldInfo_class(LIEF_MachO_module);
4041

4142

4243
// Enums

api/python/MachO/pyMachO.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void init_MachO_Symbol_class(py::module&);
3737
void init_MachO_UUIDCommand_class(py::module&);
3838
void init_MachO_MainCommand_class(py::module&);
3939
void init_MachO_DylinkerCommand_class(py::module&);
40+
void init_MachO_DyldInfo_class(py::module&);
4041

4142
// Enums
4243
void init_MachO_Structures_enum(py::module&);

doc/sphinx/api/cpp/macho.rst

+8
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,11 @@ UUIDCommand
116116

117117
.. doxygenclass:: LIEF::MachO::UUIDCommand
118118
:project: lief
119+
120+
----------
121+
122+
Dyld Info
123+
*********
124+
125+
.. doxygenclass:: LIEF::MachO::DyldInfo
126+
:project: lief

doc/sphinx/api/python/macho.rst

+9-2
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,6 @@ MainCommand
111111
----------
112112

113113

114-
115-
116114
Symbol
117115
******
118116

@@ -121,11 +119,20 @@ Symbol
121119
:inherited-members:
122120
:undoc-members:
123121

122+
----------
123+
124+
Dyld Info
125+
*********
124126

127+
.. autoclass:: lief.MachO.DyldInfo
128+
:members:
129+
:inherited-members:
130+
:undoc-members:
125131

126132
----------
127133

128134

135+
129136
Enum
130137
****
131138

examples/python/macho_reader.py

100644100755
File mode changed.

0 commit comments

Comments
 (0)