/
cpu.py
239 lines (194 loc) · 7.61 KB
/
cpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
from __future__ import print_function, absolute_import
import sys
import llvmlite.llvmpy.core as lc
import llvmlite.llvmpy.ee as le
import llvmlite.binding as ll
from numba import _dynfunc, config
from numba.callwrapper import PyCallWrapper
from .base import BaseContext, PYOBJECT
from numba import utils, cgutils, types
from numba.utils import cached_property
from numba.targets import (
callconv, cffiimpl, codegen, externals, intrinsics, listobj, cmathimpl,
mathimpl, npyimpl, operatorimpl, printimpl, randomimpl)
from .options import TargetOptions
from numba.runtime import rtsys
# Keep those structures in sync with _dynfunc.c.
class ClosureBody(cgutils.Structure):
_fields = [('env', types.pyobject)]
class EnvBody(cgutils.Structure):
_fields = [
('globals', types.pyobject),
('consts', types.pyobject),
]
class CPUContext(BaseContext):
"""
Changes BaseContext calling convention
"""
# Overrides
def create_module(self, name):
return self._internal_codegen._create_empty_module(name)
def init(self):
self.is32bit = (utils.MACHINE_BITS == 32)
self._internal_codegen = codegen.JITCPUCodegen("numba.exec")
# Map external C functions.
externals.c_math_functions.install(self)
externals.c_numpy_functions.install(self)
# Add target specific implementations
self.install_registry(cmathimpl.registry)
self.install_registry(cffiimpl.registry)
self.install_registry(mathimpl.registry)
self.install_registry(npyimpl.registry)
self.install_registry(operatorimpl.registry)
self.install_registry(printimpl.registry)
self.install_registry(randomimpl.registry)
# Initialize NRT runtime
rtsys.initialize(self)
@property
def target_data(self):
return self._internal_codegen.target_data
def with_aot_codegen(self, name):
return self.subtarget(_internal_codegen=codegen.AOTCPUCodegen(name),
aot_mode=True)
def codegen(self):
return self._internal_codegen
@cached_property
def call_conv(self):
return callconv.CPUCallConv(self)
def get_env_from_closure(self, builder, clo):
"""
From the pointer *clo* to a _dynfunc.Closure, get a pointer
to the enclosed _dynfunc.Environment.
"""
with cgutils.if_unlikely(builder, cgutils.is_null(builder, clo)):
self.debug_print(builder, "Fatal error: missing _dynfunc.Closure")
builder.unreachable()
clo_body_ptr = cgutils.pointer_add(
builder, clo, _dynfunc._impl_info['offsetof_closure_body'])
clo_body = ClosureBody(self, builder, ref=clo_body_ptr, cast_ref=True)
return clo_body.env
def get_env_body(self, builder, envptr):
"""
From the given *envptr* (a pointer to a _dynfunc.Environment object),
get a EnvBody allowing structured access to environment fields.
"""
body_ptr = cgutils.pointer_add(
builder, envptr, _dynfunc._impl_info['offsetof_env_body'])
return EnvBody(self, builder, ref=body_ptr, cast_ref=True)
def get_generator_state(self, builder, genptr, return_type):
"""
From the given *genptr* (a pointer to a _dynfunc.Generator object),
get a pointer to its state area.
"""
return cgutils.pointer_add(
builder, genptr, _dynfunc._impl_info['offsetof_generator_state'],
return_type=return_type)
def build_list(self, builder, list_type, items):
"""
Build a list from the Numba *list_type* and its initial *items*.
"""
return listobj.build_list(self, builder, list_type, items)
def post_lowering(self, mod, library):
if self.is32bit:
# 32-bit machine needs to replace all 64-bit div/rem to avoid
# calls to compiler-rt
intrinsics.fix_divmod(mod)
library.add_linking_library(rtsys.library)
def create_cpython_wrapper(self, library, fndesc, env, call_helper,
release_gil=False):
wrapper_module = self.create_module("wrapper")
fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
wrapper_callee = wrapper_module.add_function(fnty, fndesc.llvm_func_name)
builder = PyCallWrapper(self, wrapper_module, wrapper_callee,
fndesc, env, call_helper=call_helper,
release_gil=release_gil)
builder.build()
library.add_ir_module(wrapper_module)
def get_executable(self, library, fndesc, env):
"""
Returns
-------
(cfunc, fnptr)
- cfunc
callable function (Can be None)
- fnptr
callable function address
- env
an execution environment (from _dynfunc)
"""
# Code generation
baseptr = library.get_pointer_to_function(fndesc.llvm_func_name)
fnptr = library.get_pointer_to_function(fndesc.llvm_cpython_wrapper_name)
cfunc = _dynfunc.make_function(fndesc.lookup_module(),
fndesc.qualname.split('.')[-1],
fndesc.doc, fnptr, env,
# objects to keepalive with the function
(library,)
)
return cfunc
def calc_array_sizeof(self, ndim):
'''
Calculate the size of an array struct on the CPU target
'''
aryty = types.Array(types.int32, ndim, 'A')
return self.get_abi_sizeof(self.get_value_type(aryty))
# ----------------------------------------------------------------------------
# TargetOptions
class CPUTargetOptions(TargetOptions):
OPTIONS = {
"nopython": bool,
"nogil": bool,
"forceobj": bool,
"looplift": bool,
"boundcheck": bool,
"_nrt": bool,
"no_rewrites": bool,
}
# ----------------------------------------------------------------------------
# Internal
def remove_refct_calls(func):
"""
Remove redundant incref/decref within on a per block basis
"""
for bb in func.basic_blocks:
remove_null_refct_call(bb)
remove_refct_pairs(bb)
def remove_null_refct_call(bb):
"""
Remove refct api calls to NULL pointer
"""
pass
## Skipped for now
# for inst in bb.instructions:
# if isinstance(inst, lc.CallOrInvokeInstruction):
# fname = inst.called_function.name
# if fname == "Py_IncRef" or fname == "Py_DecRef":
# arg = inst.args[0]
# print(type(arg))
# if isinstance(arg, lc.ConstantPointerNull):
# inst.erase_from_parent()
def remove_refct_pairs(bb):
"""
Remove incref decref pairs on the same variable
"""
didsomething = True
while didsomething:
didsomething = False
increfs = {}
decrefs = {}
# Mark
for inst in bb.instructions:
if isinstance(inst, lc.CallOrInvokeInstruction):
fname = inst.called_function.name
if fname == "Py_IncRef":
arg = inst.operands[0]
increfs[arg] = inst
elif fname == "Py_DecRef":
arg = inst.operands[0]
decrefs[arg] = inst
# Sweep
for val in increfs.keys():
if val in decrefs:
increfs[val].erase_from_parent()
decrefs[val].erase_from_parent()
didsomething = True